library(spatstat)
library(here)
library(sp)
library(maptools)
library(tmap)
library(sf)
library(tmaptools)
library(dplyr)
library(readr)
library(glue)
library(spdep)
SchoolData <- read_csv(here::here('data', 'school_data_Google_location_highschool_cleaned.csv'),
na = c("NA", "n/a", ""))
SchoolData <- SchoolData %>% dplyr::filter(!is.na(hu_midlevel))
SchoolData <- SchoolData[!duplicated(SchoolData[ , c('school_coords','hist_midlevel', 'eng_lang_midlevel', 'hu_midlevel', 'math_midlevel')]),]
projcrs <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
SchoolData <- st_as_sf(x= SchoolData,
coords=c('longitude','latitude'),
crs=projcrs)
st_crs(SchoolData) <- projcrs
SchoolData <- SchoolData %>%
st_transform(.,23700)
TopCities <- read_csv("https://simplemaps.com/static/data/country-cities/hu/hu.csv")
TopCities <- st_as_sf(x= TopCities,
coords=c('lng','lat'),
crs=projcrs)
st_crs(TopCities) <- projcrs
### Hungary Shape
HungaryShapeMap <- st_read(here::here('data', 'gadm36_HUN_shp','gadm36_HUN_2.shp'))
## Reading layer `gadm36_HUN_2' from data source `C:\Users\botiv\OneDrive - University College London\CASA_2020\Modules\GIS and Science\Assessment\data\gadm36_HUN_shp\gadm36_HUN_2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 168 features and 13 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 16.11105 ymin: 45.74783 xmax: 22.90558 ymax: 48.58638
## geographic CRS: WGS 84
## Hungary's epsg: 23700
HungaryShapeMap <- HungaryShapeMap %>%
st_transform(., 23700)
HungarySchools <- HungaryShapeMap%>%
st_join(SchoolData)%>%
add_count(GID_2)%>%
janitor::clean_names()%>%
mutate(area=st_area(.))%>%
mutate(density=n/area)
lang_cols = c('eng','hebrew','spanish','lovari','holland','slovakian','esperanto','croatian','greek','chinese','german','romanian','french','polish','portuguese','italian','japanese','serbian','russian','beas')
HungaryLanugageAll <- HungaryShapeMap
for (l in lang_cols) {
var_lang_a_prodsum =rlang::sym(paste(l,"_lang_a_prodsum",sep = ""))
var_lang_midlevel_prodsum =rlang::sym(paste(l,"_lang_midlevel_prodsum",sep = ""))
var_prodsum =rlang::sym(paste(l,"_prodsum",sep = ""))
var_stud_count =rlang::sym(paste(l,"_stud_count",sep = ""))
var_lang_a <- rlang::sym(paste(l,"_lang_a",sep = ""))
var_lang_a_percent <- rlang::sym(paste(l,"_lang_a_percent",sep = ""))
var_lang_midlevel <- rlang::sym(paste(l,"_lang_midlevel",sep = ""))
var_lang_midlevel_percent <- rlang::sym(paste(l,"_lang_midlevel_percent",sep = ""))
var_score <- rlang::sym(paste(l,"_score",sep = ""))
### Creating extra columns
HungarySchools <- HungarySchools %>%
mutate(., (!!var_lang_a_prodsum) := ifelse((!!var_lang_a_percent) >= 45, ((!!var_lang_a_percent) + 50) * (!!var_lang_a) , (!!var_lang_a_percent) * (!!var_lang_a))) %>%
mutate(., (!!var_lang_midlevel_prodsum) := (!!var_lang_midlevel) * (!!var_lang_midlevel_percent)) %>%
mutate(., (!!var_prodsum ):= ifelse(is.na((!!var_lang_a_prodsum)), 0, (!!var_lang_a_prodsum)) + ifelse(is.na((!!var_lang_midlevel_prodsum)), 0, (!!var_lang_midlevel_prodsum))) %>%
mutate(., (!!var_stud_count) := ifelse(is.na((!!var_lang_a)), 0, (!!var_lang_a)) + ifelse(is.na((!!var_lang_midlevel)), 0, (!!var_lang_midlevel)))
## Zeros are NA -
HungarySchools[[var_stud_count]] <- HungarySchools[[var_stud_count]] %>% replace(.==0, NA)
HungarySchools[[var_prodsum]] <- HungarySchools[[var_prodsum]] %>% replace(.==0, NA)
### Groupby
HungarySchoolsLanguage<- HungarySchools %>%
group_by(gid_2) %>%
summarise(
name_2= first(name_2),
!!var_prodsum := sum(!!var_prodsum , na.rm = TRUE),
!!var_stud_count := sum(!!var_stud_count, na.rm = TRUE),
) %>%
mutate(., !!var_score := (!!var_prodsum) / (!!var_stud_count))
### Merging them into a large Dataset with all languages - covert them to data.frame - later we convert them back
HungaryLanugageAll<- merge(HungaryLanugageAll %>% as.data.frame(),HungarySchoolsLanguage %>% as.data.frame(), by.x="GID_2", by.y="gid_2", no_dups=TRUE )
#print(l)
}
## Prodsum cols
prodsum_cols <- HungaryLanugageAll %>%
select(., contains('_prodsum')) %>%
colnames(.)
## Number of Student columns
stud_cols <- HungaryLanugageAll %>%
select(., contains('_stud_count')) %>%
colnames(.)
### Creating over all columns prodsum, all student, score
HungaryLanugageAll$all_prodsum <- rowSums(HungaryLanugageAll[prodsum_cols], na.rm = TRUE)
HungaryLanugageAll$all_stud <- rowSums(HungaryLanugageAll[stud_cols], na.rm = TRUE)
### Convert back to SF
HungaryLanugageAll <- st_as_sf(HungaryLanugageAll)
## Final Language Score column
HungaryLanugageAll <- HungaryLanugageAll %>%
mutate(., all_score = all_prodsum / all_stud )
sum(HungaryLanugageAll$all_stud)
## [1] 59155
### MAP OF THE LANGUAGE TOTAL SCORE
tm_shape(HungaryLanugageAll) +
tm_polygons(col="all_score",
style = "jenks",
palette = c('#ffffff','#ebf0ff','#bdd7e7' , '#3182bd', '#08519c'),
#breaks = c(0, 50, 70, 80,110),
midpoint=NA,
legend.hist = TRUE,
border.alpha = 0.3,
popup.vars=c("NAME_2", "all_score"),
title="Language Score") +
tm_shape(slice_head(TopCities, n=10)) +
tm_dots(col = 'red', legend.z = 'city' , size = 0.2) +
#tm_symbols(col = 'grey' , size = 'population') +
tm_shape(slice_head(TopCities, n=1)) +
tm_text('city',col='white',shadow = FALSE , size=0.6, ymod = 0.3,fontface="bold") +
tm_layout( legend.hist.bg.color = '#dedede',
legend.title.size = 1,
legend.outside.size=0.5,
legend.outside = TRUE,
legend.outside.position = 'right',
frame=FALSE
) +
tm_scale_bar(position = c("right", "bottom"), text.size = 0.40)+
tm_compass(type = "8star", size = 1, position = c(0.83, 0.15))
### MAP OF THE LANGUAGE TOTAL SCORE
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(HungaryLanugageAll) +
tm_polygons(col="all_score",
style = "jenks",
palette = c('#ffffff','#ebf0ff','#bdd7e7' , '#3182bd', '#08519c'),
#breaks = c(0, 50, 70, 80,110),
midpoint=NA,
legend.hist = TRUE,
border.alpha = 0.3,
popup.vars=c("NAME_2", "all_score"),
title="Language Score") +
tm_shape(slice_head(TopCities, n=10)) +
tm_dots(col = 'red', legend.z = 'city' , size = 0.2) +
#tm_symbols(col = 'grey' , size = 'population') +
tm_shape(slice_head(TopCities, n=1)) +
tm_text('city',col='white',shadow = FALSE , size=0.6, ymod = 0.3,fontface="bold") +
tm_layout( legend.hist.bg.color = '#dedede',
legend.title.size = 1,
legend.outside.size=0.5,
legend.outside = TRUE,
legend.outside.position = 'right',
frame=FALSE
)
## Calculate the SELE language exams average by school
HungaryAverage = sum(HungaryLanugageAll$all_prodsum) / sum(HungaryLanugageAll$all_stud)
## Create the LQ column
HungaryLanugageAll$all_score_LQ<-ifelse(is.na(HungaryLanugageAll$all_score), NA, HungaryLanugageAll$all_score/HungaryAverage)
#https://colorbrewer2.org/#type=sequential&scheme=Purples&n=5
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(HungaryLanugageAll) +
tm_polygons(col="all_score_LQ",
style = "jenks",
palette = 'RdBu',
#breaks = c(0, 50, 70, 80,110),
midpoint=NA,
legend.hist = TRUE,
border.alpha = 0.3,
popup.vars=c("NAME_2", "all_score"),
title="Language Score Location Quotient") +
tm_shape(slice_head(TopCities, n=10)) +
tm_dots(col = 'red', legend.z = 'city' , size = 0.2) +
#tm_symbols(col = 'grey' , size = 'population') +
tm_shape(slice_head(TopCities, n=1)) +
tm_text('city',col='white',shadow = FALSE , size=0.6, ymod = 0.3,fontface="bold") +
tm_layout( legend.hist.bg.color = '#dedede',
legend.title.size = 1,
legend.outside.size=0.5,
legend.outside = TRUE,
legend.outside.position = 'right',
frame=FALSE
) +
tm_scale_bar(position = c("right", "bottom"), text.size = 0.40)+
tm_compass(type = "8star", size = 1, position = c(0.83, 0.15))
### Location Quotient calucaltion - Interactive
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(HungaryLanugageAll) +
tm_polygons(col="all_score_LQ",
style = "jenks",
palette = 'RdBu',
#breaks = c(0, 50, 70, 80,110),
midpoint=NA,
legend.hist = TRUE,
border.alpha = 0.3,
popup.vars=c("NAME_2", "all_score"),
title="Language Score Location Quotient") +
tm_shape(slice_head(TopCities, n=10)) +
tm_dots(col = 'red', legend.z = 'city' , size = 0.2) +
#tm_symbols(col = 'grey' , size = 'population') +
tm_shape(slice_head(TopCities, n=1)) +
tm_text('city',col='white',shadow = FALSE , size=0.6, ymod = 0.3,fontface="bold") +
tm_layout( legend.hist.bg.color = '#dedede',
legend.title.size = 1,
legend.outside.size=0.5,
legend.outside = TRUE,
legend.outside.position = 'right',
frame=FALSE
)
## Drop NaNs
HungaryLanugageAll <- HungaryLanugageAll %>% dplyr::filter(!is.na(all_score))
coordsW <- HungaryLanugageAll%>%
st_centroid()%>%
st_geometry()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
plot(coordsW,axes=TRUE)
#create a neighbours list
HungaryLanugageAll_nb <- HungaryLanugageAll %>%
spdep::poly2nb(., queen=T)
### Plot
tmap_mode("plot")
## tmap mode set to plotting
plot(HungaryLanugageAll_nb, st_geometry(coordsW), col="red")
#add a map underneath
plot(HungaryLanugageAll$geometry, add=T)
#create a spatial weights object from these weights
HungaryLanugageAll.lw <- HungaryLanugageAll_nb %>%
spdep::nb2listw(., style="C")
### Moran's I
##all score the variable
I_HungaryLanugageAll_Global_Density <- HungaryLanugageAll %>%
pull(all_score) %>%
as.vector()%>%
spdep::moran.test(., HungaryLanugageAll.lw)
HungaryLanugageAll.lw
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 158
## Number of nonzero links: 820
## Percentage nonzero weights: 3.28473
## Average number of links: 5.189873
##
## Weights style: C
## Weights constants summary:
## n nn S0 S1 S2
## C 158 24964 158 60.8878 681.9434
I_HungaryLanugageAll_Global_Density
##
## Moran I test under randomisation
##
## data: .
## weights: HungaryLanugageAll.lw
##
## Moran I statistic standard deviate = 4.3938, p-value = 5.569e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.206107125 -0.006369427 0.002338512
Gi_FL_score_Local_Density <- HungaryLanugageAll %>%
pull(all_score) %>%
as.vector()%>%
localG(., HungaryLanugageAll.lw)
HungaryLanugageAll <- HungaryLanugageAll %>%
mutate(density_G = as.numeric(Gi_FL_score_Local_Density))
breaks1<-c(-1000,-2.58,-1.96,-1.65,1.65,1.96,2.58,1000)
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(HungaryLanugageAll) +
tm_polygons("density_G",
palette="-RdBu",
style="fixed",
breaks=breaks1,
midpoint=NA,
title="Gi*, FL capabilities Hot-Cold Spot in Hungary")+
tm_shape(slice_head(TopCities, n=10)) +
tm_dots(col = 'red', legend.z = 'city' , size = 0.2) +
#tm_symbols(col = 'grey' , size = 'population') +
tm_shape(slice_head(TopCities, n=1)) +
tm_text('city',col='white',shadow = FALSE , size=0.6, ymod = 0.3,fontface="bold") +
tm_layout( legend.hist.bg.color = '#dedede',
legend.title.size = 1,
legend.outside.size=0.5,
legend.outside = TRUE,
legend.outside.position = 'right',
frame=FALSE
) +
tm_scale_bar(position = c("right", "bottom"), text.size = 0.40)+
tm_compass(type = "8star", size = 1, position = c(0.83, 0.15))